/***************************************************************************************
Firm-embedded productivity and cross-country income differences
Alviarez, Cravino and Ramondo
Journal of Political Economy (2022)

Program: reg_01_naics.do
Date: October 2022

Description: Estimating the country-sector effects Anj

*****************************************************************************************/



*===============================================================================
*Estimation (Baseline: market, no export, dist/lang cluster country-sector) 
*===============================================================================
display "`zz'"
if "`zz'"=="naics4" {
local industry NAICS4 
}
if "`zz'"=="naics3" {
local industry NAICS3 
}
else {
local industry sector
}
*

*Providing a number to each sector
use "${data}/sin_firmlevel_`zz'_gravity${lf}`ending'.dta", clear
$nonmarket
keep sector1 sector
sort sector 
duplicates drop
gen sec_num=_n
sum sec_num
local sector_max=`r(max)'
display "`sector_max'"
tempfile sec_num
save `sec_num', replace


foreach y in `period' {


*-----------------------------------------------------
********************* SETTINGS ***********************
clear all
set more off
use "${data}/sin_firmlevel_`zz'_gravity${lf}`ending'.dta", clear
merge m:1 sector using "${data}/oalpha_ctte_FR.dta"
drop if _merge==2
drop _merge

*--------------------------------------------------------------
display "Dropping observations out of range for the variable of interest"
$nonmarket 
tab isocode
$cont_firms
tab isocode
$cont_firms2
tab isocode
$numctry
tab isocode
$pctiles
tab isocode
$other_restr
tab isocode
$other_restr2
tab isocode

display "Select the year: `y'"
keep if year==`y'

  
*--------------------------------------------------------------
 tempfile file_main 
save `file_main', replace


********************* SETTINGS ***********************
use `file_main', clear

display "Sample to be use (sample 0: 50 countries; sample 1: 20 countries; sample 2: 13 countries)"
*-------------------------------------------------------
if `samp'==1 { 
keep if isocode!=""
keep if isocode=="DK" | isocode=="JP" | isocode=="IT" | isocode=="KR" | isocode=="DE" | isocode=="FR" | isocode=="ES" |  isocode=="MX" |  isocode=="GB" |  isocode=="PL" |  isocode=="GR" |  isocode=="NL" |  isocode=="RO" |  isocode=="BE" |  isocode=="AT" |  isocode=="PT" |  isocode=="FI" |  isocode=="SE" |  isocode=="CZ" |  isocode=="SK" |  isocode=="BG" |  isocode=="HU" |  isocode=="HR" |  isocode=="LV" |  isocode=="SI" |  isocode=="LT" |  isocode=="EE" 
}


display "Generating LHS"
*------------------------------------------
gen lhs2=$LHS
tab isocode if lhs2!=.
keep if lhs2!=.


if "`affparent'"=="0" {
display "Keeping MNCs only"
by year sector guo_bvd isocode, sort: gen a=_n==1
replace a=0 if isocode==hq
by year sector guo_bvd, sort: egen b=total(a)
tab b 
keep if b>=2
drop a b 
tab isocode 
}
if "`affparent'"=="1" {
display "Keeping MNCs, even when having one affiliate abroad only"
by year sector guo_bvd isocode, sort: gen a=_n==1
by year sector guo_bvd, sort: egen b=total(a)
tab b 
keep if b>=1
drop a b 
tab isocode 
}


if _N==0 {
set obs 1 
save "${output}/fe_`y'.dta", replace
}

else {
tempfile temp0
save `temp0', replace 


display "Keep track of HQ and isocode"
*------------------------------------------

*Create the type variable
use `temp0', clear 
gen type=""
replace type="manuf" if sector=="Basic Metals (24-25)"
replace type="manuf" if sector=="ChePetPla (19-23)"
replace type="manuf" if sector=="ElecMach (26-28)"
replace type="manuf" if sector=="Food (10-12)"
replace type="manuf" if sector=="TexWood (13-18)"
replace type="manuf" if sector=="TranspOtherManuf (29-33)"
replace type="others" if sector=="AgrMining (A-B)"
replace type="others" if sector=="Construction (F)"
replace type="others" if sector=="Electricity (D-E)"
replace type="serv" if sector=="Accomod_Recreat (I-R-S)"
replace type="serv" if sector=="Financial_Insurance (K)"
replace type="serv" if sector=="Information (J)"
replace type="serv" if sector=="Support_Services (M-N)"
replace type="serv" if sector=="Transportation_Storage (H)"
replace type="serv" if sector=="Wholesale_Retail (G)"
replace type="nonmarket" if sector=="Education (P)"
replace type="nonmarket" if sector=="Health (Q)" 
replace type="nonmarket" if sector=="Public_Administration (O)"
replace type="nonmarket" if sector=="Real_Estate (L)"
tab year if type==""


*This keeps track of the number of countries (which can be different by year)*
by isocode, sort: gen a=_n==1
egen b=total(a)
sum b
local num_countries_iso=`r(max)'
display "`num_countries_iso'"
drop a b

*Check the number of headquarter countries
by hq, sort: gen a=_n==1
egen b=total(a)
sum b
local num_countries_hq=`r(max)'
display "`num_countries_hq'"
drop a b

*Create a Parent and Affiliate dummy
gen parent=0
replace parent=1 if isocode==hq 
tab parent
gen aff=0
replace aff=1 if isocode!=hq 
tab aff


*Create a --unit of analysis-- dataset 
*------------------------------------------
display "Create a --unit of analysis-- dataset (year-isocode-sector), excluding reference country"
preserve
keep year isocode type sector
duplicates drop
drop if isocode=="$ctryrel"
sort year isocode sector
by year, sort: gen coef=_n
tempfile t_`y'
save  `t_`y'', replace
restore

display "Create a --unit of analysis-- dataset (year-isocode-sector), including reference country"
preserve
keep year isocode type sector
duplicates drop
sort year isocode sector
by year, sort: gen coef=_n
tempfile t_`y'_fra
save  `t_`y'_fra', replace
restore

compress
tempfile baseline_file
save `baseline_file', replace 



*===============================================================================
*Preparing for regression 
*===============================================================================
use `baseline_file', clear
set matsize 11000

egen ctryseccomb=concat(isocode sector), p("/")
tabulate ctryseccomb, generate(iso2sec)
drop ctryseccomb


*Country-sector dummy
levelsof isocode, local(iso2) 
local start=0
foreach xx of local iso2 {
preserve 
keep if isocode=="`xx'"
keep sector
duplicates drop
merge 1:1 sector using `sec_num', keepusing(sec_num)
drop if _merge==2
drop _merge
rename sec_num id
sum id
local sse=`r(N)'
levelsof id, local(id2) 
restore

foreach ii of local id2 {
display "`xx'; `ii'"
local start=`start'+1
display "iso2sec`start'; iso`xx'sec`ii'"
rename iso2sec`start' iso`xx'sec`ii'
}
}

*Interaction dummies (parent): (country-sector dummy x parent dummy)
foreach i of varlist iso*sec* {
display "`i'"
gen p`i'=`i'*parent
       local x : variable label `i'
label variable p`i' "`x'"
}
*Interaction dummies (affiliate): (country-sector dummy x affiliate dummy)
foreach i of varlist iso*sec* {
display "`i'"
gen a`i'=`i'*aff
       local x : variable label `i'
label variable a`i' "`x'"
}

*----------------------------------------------------------
*Adding taxes and bilateral trade aggrements (BTT) 
*----------------------------------------------------------
gen iso2_ctry1=isocode
gen iso2_ctry2 =hq 
merge m:1 iso2_ctry1 iso2_ctry2 using "${data}/taxes_ratio.dta", keepusing(BTT ln_tax_ratio ln_tax_ratio2)
replace BTT=0 if _merge==1
tab _merge
tab BTT
tab BTT if _merge!=2
drop if _merge==2
drop _merge

*----------------------------------------------------------
*Sector specific gravity variables (distance and language)  
*----------------------------------------------------------
tabulate sector, generate(sec)
gen distance2=(distance)^2

levelsof sector, local(sectorcount)
local sec_count=`: word count `sectorcount''
display "`sec_count'"

forvalues i=1(1)`sec_count' {
display "sec`i'"
gen  dist_sec`i'=sec`i'*distance
gen  dist2_sec`i'=sec`i'*distance2
gen  dist3_sec`i'=1
gen lang_sec`i'=sec`i'*comlang_off
gen lang3_sec`i'=1
gen BTT_sec`i'=sec`i'*BTT
gen tax_ratio_sec`i'=sec`i'*ln_tax_ratio
gen tax_ratio2_sec`i'=sec`i'*ln_tax_ratio2
drop sec`i'
}
drop distance2


*GUO-industry FE, bilateral-industry FE and cluster
egen dyadicFE=concat(isocode hq `industry'), p("/")
egen guoFE=group(guo_bvd `industry')
egen hqsecFE=group(hq `industry')
gen guoFEcte=1
egen double_cluster=group(isocode sector)


*Bring the gdd per capita (add the differences in GDP per capita (gdp_{i}-gdp_{n}) as a bilateral control: it may be harder to transfer knwoledge from rich to poor) 
*----------------------------------------------
preserve
use "${data}/WDI_clean_version2020.dta", clear
keep isocode year gdp_pc_ppp gdp_pc_curr
drop if gdp_pc_ppp==. & gdp_pc_curr==.
duplicates drop 
tempfile gdppc
save `gdppc', replace
restore  
merge m:1 isocode year using `gdppc'
drop if _merge==2
drop _merge
rename gdp_pc_ppp gdp_pc_ppp_iso
rename gdp_pc_curr gdp_pc_curr_iso
rename isocode isotemp
rename hq isocode 
merge m:1 isocode year using `gdppc'
drop if _merge==2
rename gdp_pc_ppp gdp_pc_ppp_hq
rename gdp_pc_curr gdp_pc_curr_hq
rename isocode hq
rename isotemp isocode
gen gdp_pc_ppp_in=ln(abs(gdp_pc_ppp_hq-gdp_pc_ppp_iso)+1) 
gen gdp_pc_curr_in=ln(abs(gdp_pc_curr_hq-gdp_pc_curr_iso)+1)
drop _merge

save "${data}/pre_reg0_`title'_`y'.dta", replace



*===============================================================================
*Regression 
*===============================================================================
*Drop the dummies corresponding to the country of reference in order to have country FE relative to the Reference Country (France in this case)
use "${data}/pre_reg0_`title'_`y'.dta", clear
local countwithref=0
foreach v of varlist aiso*sec* {
display "`v'"
local countwithref=`countwithref'+1
       local x : variable label `v'
        if strpos("`x'", "$ctryrel/")>0 drop `v' 
}
display "`countwithref'"

*Counting once the ref country is dropped. 
local countnoref=0
foreach v of varlist aiso*sec* {
local countnoref=`countnoref'+1
display "`v'"
}
display "`countnoref'"

if $guoFEonly==1 {
foreach v of varlist iso*sec* {
replace `v'=1
}
foreach v of varlist piso*sec* {
replace `v'=1
}
}
*


*====================================================================================
display "Saving each predicted component (before start) for variance decomposition calculation"
*====================================================================================
preserve
local subset "$ind_var"
unab subset : `subset'
scalar LLL=wordcount("`subset'")

reghdfe lhs2 `subset'  aiso*sec* piso*sec*, abs(`absFEreg', savefe) vce(cluster double_cluster) residuals(resid)
gen esample=e(sample)
tab esample
keep if esample==1
drop esample

mat b=e(b)
mat b=b[1,1..LLL]
mat score grav_hat=b if e(sample)
keep year type sector1 sector `industry' hq guo_bvd isocode *hat 
duplicates drop
replace year=`y' if year==.
order guo_bvd year 
save "${data}/gravity_`y'.dta", replace
restore


*====================================================================================
reghdfe lhs2 aiso*sec* piso*sec* $ind_var, abs(`absFEreg', savefe) vce(cluster double_cluster) residuals(resid)
gen esample=e(sample)
tab esample
keep if esample==1
drop esample
tab BTT

*xb fitted values
predict xb1, xb
*xb + d_absorbvars
predict xbd1, xbd
*d_absorbvars
predict d1, d
*Constant 
local ctte=_b[_cons]

gen R2=`e(r2)'
gen R2adj=`e(r2_a)'
gen R2w=`e(r2_within)'
gen R2wadj=`e(r2_a_within)'
sum R2 R2adj R2w R2wadj 
drop R2 R2adj R2w R2wadj 
save "${data}/reg0_`title'_`y'.dta", replace


*-----------------------------------------
display "Creating variable of interest "
*-----------------------------------------
use "${data}/reg0_`title'_`y'.dta", clear  
preserve

*Number of foreign affiliates 
by year sector isocode guo_bvd, sort: gen a=_n==1
replace a=0 if isocode==hq
by year sector isocode, sort: egen num_aff=total(a)
drop a 
label variable num_aff "Number of foreign affiliates in a ctry-sector pair"

*From how many countries they are coming from 
by year isocode sector hq, sort: gen a=_n==1
replace a=0 if isocode==hq
by year isocode sector, sort: egen num_aff_ctry=total(a)
drop a 
label variable num_aff_ctry "Number of sources where foreign affiliates come from in each ctry-sector pair"

*Number of parents in each country-sec pair 
by year isocode sector guo_bvd, sort: gen a=_n==1
replace a=0 if isocode!=hq
by year isocode sector, sort: egen num_parents=total(a)
drop a 
label variable num_parents "Number of local parents from that ctry-sector pair"

keep year type sector1 sector isocode num_aff num_aff_ctry num_parents
duplicates drop 
sort year isocode type sector1 sector 
tempfile variables_of_interest
save `variables_of_interest', replace

restore
*------------------------------------------



*===============================================================================
*Saving estimated coeffients
*===============================================================================

display "Saving the Error term"
*-----------------------------------------
preserve
keep year type sector1 sector `industry' hq guo_bvd isocode resid xb1 xbd1 d1 lhs2
duplicates drop
replace year=`y' if year==.
order guo_bvd year 
save "${data}/residual_`y'.dta", replace
restore
*-----------------------------------------

display "Saving FIRM FIXED EFFECTS"
*-----------------------------------------
preserve
keep year type sector1 sector `industry' hq guo_bvd __hdfe1__
rename __hdfe1__ firm_FE
duplicates drop
replace year=`y' if year==.
order guo_bvd year 
save "${data}/firmFE_`y'.dta", replace
restore
drop __hdfe1__
*------------------------------------------

display "Number of Obs and R2's"
*-----------------------------------------
preserve 
gen num_obs=`e(N)'
gen num_full=`e(N_full)' 
gen num_sing=`e(num_singletons)' 
gen mss=`e(mss)'
gen mdf=`e(df_m)'
gen rss=`e(rss)'
gen rdf=`e(df_r)'
gen rms=`e(rmse)'
gen num_FE=`e(N_hdfe)'
gen R2=`e(r2)'
gen R2adj=`e(r2_a)'
gen R2w=`e(r2_within)'
gen R2wadj=`e(r2_a_within)'
keep year num_obs num_full num_sing mss mdf rss rdf rms num_FE R2 R2adj R2w R2wadj
duplicates drop 
format %9.0fc num_obs num_full num_sing mss mdf rss rdf rms num_FE
format %9.2fc R2* 
save "${data}/R2_`y'.dta", replace
restore 
*------------------------------------------

display "Saving the DAj and DPj"
*-----------------------------------------
mata: b=st_matrix("e(b)")'
mata: se=sqrt(diagonal(st_matrix("e(V)"))) 
getmata b se, force 
keep b se
drop if b==.
gen coef=_n
gen year=`y'

preserve
keep if coef<=`countnoref'
save "${data}/mfe_`y'.dta", replace
restore

preserve
local last=`countwithref'+`countnoref'
keep if coef>=`countnoref'+1 & coef<=`last'
drop coef
gen coef=_n
display "Check that no sector has all location as parents"
tab coef if b==. | b==0
save "${data}/xfe_`y'.dta", replace
restore

preserve
local last=`countwithref'+`countnoref'
keep if coef>=`last'+1 & coef< _N
drop coef
gen coef=_n
tab coef if b==. | b==0
save "${data}/gravitycoef_`y'.dta", replace
restore

preserve
keep if  _n == _N
drop coef
gen coef=_n
rename b ctte
rename se se_ctte
save "${data}/ctte_`y'.dta", replace
restore

*LOCATION FE
*-----------------
clear all
use "${data}/mfe_`y'.dta", clear
rename b fe_location
replace fe_location=. if fe_location==0
rename se se_location
replace se_location=. if se_location==0
merge 1:1 year coef using `t_`y''
drop if _merge==2
drop _merge
drop coef 
tempfile mfe
save `mfe', replace

*SOURCE FE
*-----------------
clear all
use "${data}/xfe_`y'.dta", clear
merge 1:1 year coef using `t_`y'_fra' 
drop if _merge==2
drop _merge
rename b fep_location
replace fep_location=. if fep_location==0
rename se sep_location
replace sep_location=. if sep_location==0

gen fep_fra=fep_location if isocode=="$ctryrel"
by sector, sort: egen a=max(fep_fra) 
drop fep_fra
rename a fep_fra
gen fe_inter=fep_location-fep_fra
drop coef 
tempfile xfe
save `xfe', replace

*Combining location and source FE (+ variables of interest)
*-----------------
use `mfe', clear
merge 1:1 year type sector isocode using `xfe', keepusing(fep_location sep_location fe_inter) 
drop _merge
merge 1:1 year type isocode  sector using `variables_of_interest', keepusing(sector1 num_aff num_aff_ctry num_parents) 
drop if _merge==2
drop _merge
merge m:1 year using "${data}/R2_`y'.dta", keepusing(num_obs num_full num_sing mss mdf rss rdf rms num_FE R2 R2adj R2w R2wadj)
drop if _merge==2
drop _merge
merge m:1 year using "${data}/ctte_`y'.dta", keepusing(*ctte*)
drop if _merge==2
drop _merge


*Fillig from France in the master
foreach i in fe_location se_location {
replace `i'=0 if isocode=="$ctryrel" & `i'==. 
}
replace year=`y' if year==.

*Filling the sector1 sector (also number of obs and R2's) 
foreach i in sector1 sector {
display "`i'"
by year sector, sort: egen a=mode(`i'), maxmode
drop `i'
rename a `i'
}
foreach i in num_obs num_full num_sing mss mdf rss rdf rms num_FE R2 R2adj R2w R2wadj {
display "`i'"
by year sector, sort: egen a=max(`i')
drop `i'
rename a `i'
}
*

*Creating DA and DP 
*---------------------------------------------------
gen DP=fe_inter
gen DA=fe_location
gen D_Waugh=fe_location*(-1)
gen D_EK=(fe_inter)*(-1)

foreach x in DP DA D_Waugh D_EK {
replace `x'=0 if isocode=="$ctryrel"
}

order isocode year type sector1 sector fe_location se_location fep_location sep_location fe_inter D_EK D_Waugh DA DP 
compress
save "${data}/fe_`y'.dta", replace
erase "${data}/mfe_`y'.dta"
erase "${data}/xfe_`y'.dta"

}
}
*


*-------------------------------------------------
*Append (all datasets) across years and add type
*-------------------------------------------------
clear all
set obs 1
foreach y in `period' {
append using "${data}/gravity_`y'.dta"
}
drop if year==.
gen table="`title'"
save "${data}/gravity_`title'.dta", replace 

*-----------------------
clear all
set obs 1
foreach y in `period' {
append using "${data}/gravitycoef_`y'.dta"
}
drop if year==.
gen table="`title'"
rename coef sec_num


local xx=wordcount("${ind_var}")
if `xx'==2 {
gen variable="distance" if sec_num<=`sector_max'
replace variable="language" if sec_num>`sector_max'

gen xx= sec_num-19
replace sec_num=xx if xx>0
merge m:1 sec_num using `sec_num'
drop if _merge==2
drop _merge
drop xx

rename b coef_
rename se se_ 
capture reshape wide coef_ se_ , i( sector ) j( variable ) string
order table sector1 sector sec_num year 
sort sector1 sec_num
save "${data}/gravitycoef_`title'.dta", replace 
}


*-----------------------
clear all
set obs 1
foreach y in `period' {
append using "${data}/residual_`y'.dta"
}
drop if year==.
gen table="`title'"
save "${data}/residual_`title'.dta", replace 

*-----------------------
clear all
set obs 1
foreach y in `period' {
append using "${data}/firmFE_`y'.dta"
}
drop if year==.
gen table="`title'"
save "${data}/firmFE_`title'.dta", replace 

*-----------------------
clear all
set obs 1
foreach y in `period' {
append using "${data}/R2_`y'.dta"
}
drop if year==.
gen table="`title'"
save "${data}/R2_`title'.dta", replace 

*-----------------------
clear all
set obs 1
foreach y in `period' {
append using "${data}/fe_`y'.dta"
}
drop if year==.
gen table="`title'"
save "${data}/estimates_sec_`title'.dta", replace 

*-----------------------
clear all
set obs 1
foreach y in `period' {
append using "${data}/ctte_`y'.dta"
}
drop if year==.
gen table="`title'"
save "${data}/ctte_`title'.dta", replace 



*======================================================
*Erase intermediate files 
*======================================================
foreach y in `period' {
display "`y'"
capture erase "${data}/gravity_`y'.dta"
capture erase "${data}/residual_`y'.dta"
capture erase "${data}/firmFE_`y'.dta"
capture erase "${data}/R2_`y'.dta"
capture erase "${data}/fe_`y'.dta"
capture erase "${data}/estimates_sec_`title'_`y'.dta"
capture erase "${data}/ctte_`y'.dta" 
capture erase "${data}/gravitycoef_`y'.dta"
capture erase "${data}/reg0_`title'_`y'.dta"
}
*




